library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
library(methods)
library(alrtools)
##
## Attaching package: 'alrtools'
##
## The following objects are masked from 'package:lubridate':
##
## day, month, quarter, year
##
## The following object is masked from 'package:base':
##
## ifelse
# Install alrtools from https://github.com/adamleerich/alrtools
# devtools::install_github('adamleerich/alrtools')
# Variables that might change year-to-year
rdata_in <- 'data/wrangled.RData'
rdata_out <- 'data/eda.RData'
rdata <- alrtools::load_env(rdata_in)
tbl_responses <- rdata$tbl_responses
tbl_demographic <- rdata$tbl_demographic
tbl_respondent_tool <- rdata$tbl_respondent_tool
tbl_suitability <- rdata$tbl_suitability
tbl_techniques <- rdata$tbl_techniques
tbl_barrier <- rdata$tbl_barrier
tbl_responses$designation <- tbl_responses$actuarial_credential
tbl_demographic$designation <- tbl_demographic$actuarial_credential
tbl_respondent_tool$designation <- tbl_respondent_tool$actuarial_credential
years <- sort(unique(tbl_responses$year))
tbl_respondents <- tbl_responses %>%
group_by(year) %>%
summarise(
n_respondents = n() )
tbl_respondents
## # A tibble: 3 × 2
## year n_respondents
## <int> <int>
## 1 2021 1294
## 2 2022 646
## 3 2023 819
bar_fill_colors <- c(
'#FED35D'
, '#B4A3BB'
, "#D0E4F4"
, '#1269FB'
, '#142345')
augment_shifted_stack <- function(plt_in, fill_labels){
plt_in +
geom_col() +
theme_minimal() +
coord_flip() +
geom_hline(yintercept = 0) +
scale_fill_manual(
values = bar_fill_colors
, limits = fill_labels
) +
labs(x = 'Tool', y = '# of users') +
scale_y_continuous(labels = abs)
}
str_additional_stops <- c(
'tools',
'software',
'reserving',
'modeling',
'data',
'risk',
'capital',
'excel',
'actuarial',
'specific')
tbl_tools_added <- tbl_responses %>%
filter(!is.na(what_other_tools_should_be_added)) %>%
filter(length(what_other_tools_should_be_added) > 0) %>%
mutate(
what_other_tools_should_be_added = what_other_tools_should_be_added %>%
str_to_lower()
) %>%
select(respondent_id, what_other_tools_should_be_added) %>%
unnest_tokens(word, what_other_tools_should_be_added) %>%
anti_join(stop_words, by = "word") %>%
filter(!word %in% str_additional_stops) %>%
mutate(
word = str_to_title(word)
, word = ifelse(word == 'Vba', 'VBA', word)
)
# compare_usage <- function(tbl_in, frequency_in = 'Daily') {
#
# tbl_in %>%
# select(-respondent_id) %>%
# mutate(
# freq_val = usage_frequency == frequency_in
# ) %>%
# filter(!is.na(freq_val)) %>%
# group_by(tool, freq_val) %>%
# summarise(
# n = n()
# ) %>%
# mutate(
# pct_of_total = n / sum(n)
# ) %>%
# filter(freq_val) %>%
# arrange(desc(pct_of_total)) %>%
# select(-freq_val) %>%
# mutate(
# usage = frequency_in
# )
# }
summarise_usage <- function(tbl_in) {
tbl_in %>%
group_by(year, tool, usage_frequency) %>%
summarise(
n_total = n()
) %>%
ungroup() %>%
filter(!is.na(usage_frequency)) %>%
group_by(year, tool) %>%
mutate(
pct_tool = n_total / sum(n_total)
) %>%
group_by(year, usage_frequency) %>%
mutate(
pct_usage_frequency = n_total / sum(n_total)
) %>%
ungroup()
}
tbl_usage_summary <- tbl_respondent_tool %>%
summarise_usage()
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
tbl_usage_summary
## # A tibble: 125 × 6
## year tool usage_frequency n_total pct_tool pct_usage_frequency
## <int> <fct> <fct> <int> <dbl> <dbl>
## 1 2021 R Daily 111 0.0867 0.0567
## 2 2021 R Weekly 118 0.0921 0.120
## 3 2021 R Monthly 123 0.0960 0.145
## 4 2021 R Less 272 0.212 0.204
## 5 2021 R Never 657 0.513 0.102
## 6 2021 Excel Daily 1215 0.943 0.621
## 7 2021 Excel Weekly 56 0.0435 0.0570
## 8 2021 Excel Monthly 8 0.00621 0.00940
## 9 2021 Excel Less 5 0.00388 0.00375
## 10 2021 Excel Never 4 0.00311 0.000623
## # ℹ 115 more rows
tbl_usage_summary %>%
filter(tool == 'R') %>%
ggplot(aes(year, pct_tool)) +
geom_line(aes(color = usage_frequency)) +
scale_x_continuous(breaks = years) +
theme_minimal()

tbl_usage_summary %>%
filter(usage_frequency == 'Daily') %>%
filter(!tool %in% c('Excel', 'MATLAB')) %>%
ggplot(aes(year, pct_tool)) +
geom_line(aes(color = tool)) +
scale_x_continuous(breaks = years) +
theme_minimal()

tbl_usage_summary %>%
filter(usage_frequency == 'Daily') %>%
filter(!tool %in% c('Excel', 'MATLAB', 'SQL')) %>%
ggplot(aes(year, pct_tool)) +
geom_line(aes(color = tool)) +
scale_x_continuous(breaks = years) +
theme_minimal()

tbl_usage_summary %>%
filter(usage_frequency == 'Daily') %>%
filter(tool %in% c('Excel', 'SQL')) %>%
ggplot(aes(year, pct_tool)) +
geom_line(aes(color = tool)) +
scale_x_continuous(breaks = years) +
theme_minimal()

tbl_usage_summary %>%
filter(usage_frequency == 'Never') %>%
filter(!tool %in% c('Excel', 'MATLAB', 'SQL')) %>%
ggplot(aes(year, pct_tool)) +
geom_line(aes(color = tool)) +
scale_x_continuous(breaks = years) +
theme_minimal()

f_usage <- c("Never", "Less", "Monthly", "Weekly", "Daily") %>%
as_factor()
plt_usage_summary <- function(tbl_summary_in){
tbl_summary_in %>%
ggplot(aes(tool, n_total, fill = usage_frequency)) %>%
augment_shifted_stack(f_usage) +
facet_grid(rows = vars(year)) +
labs(fill = "Frequency")
}
f_unused_tools <- tbl_usage_summary %>%
filter(usage_frequency == 'Never') %>%
arrange(desc(pct_tool)) %>%
pull(tool) %>%
unique() %>%
as.character()
# setdiff(tbl_respondent_tool$tool, f_unused_tools)
# Kevin tried adding facet_wrap in plt_usage_summary,
# but it didn't look good
for(i in 1:length(years)) {
plot_title <- paste("Basic Usage For", years[i])
plt_basic_usage <- tbl_usage_summary %>%
filter(year == years[i]) %>%
mutate(
n_total = n_total * ifelse(
usage_frequency == 'Never'
, -1
, 1)
) %>%
mutate(
tool = tool %>% fct_relevel(f_unused_tools)
) %>%
plt_usage_summary() +
ggtitle(plot_title, subtitle = waiver())
print(plt_basic_usage)
}



tbl_summary_by_age <- map_dfr(levels(tbl_responses$age), function(x){
tbl_respondent_tool %>%
inner_join(tbl_demographic) %>%
filter(age == x) %>%
summarise_usage() %>%
mutate(
age = x
)
}) %>%
mutate(
age = age %>% fct_relevel(levels(tbl_responses$age))
, usage_frequency = fct_rev(usage_frequency)
, tool = tool %>%
as.factor() %>%
fct_relevel(f_unused_tools) %>%
fct_rev()
) %>%
filter(!is.na(age))
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
plot_usage_by_category <- function(plt_in){
plt_in +
geom_bar(stat = 'identity', position = 'fill') +
facet_wrap(~ tool) +
theme_minimal() +
geom_hline(yintercept = 0.5) +
scale_fill_manual(
values = bar_fill_colors
, limits = f_usage
) +
labs(x = NULL, y = '% of respondents') +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.5, 1))
}
for(i in 1:length(years)) {
plot_title <- paste("Usage By Age For", years[i])
plt_usage_by_age <- tbl_summary_by_age %>%
filter(year == years[i]) %>%
ggplot(aes(age, n_total, fill = usage_frequency)) %>%
plot_usage_by_category() +
coord_flip() +
ggtitle(plot_title, subtitle = waiver())
print(plt_usage_by_age)
}



tbl_summary_by_designation <- map_dfr(levels(tbl_responses$designation), function(x){
tbl_respondent_tool %>%
inner_join(tbl_demographic) %>%
filter(designation == x) %>%
summarise_usage() %>%
mutate(
designation = x
)
}) %>%
mutate(
designation = designation %>% fct_relevel(levels(tbl_responses$designation))
, usage_frequency = fct_rev(usage_frequency)
, tool = tool %>%
as.factor() %>%
fct_relevel(f_unused_tools) %>%
fct_rev()
)
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(respondent_id, year, actuarial_credential, age,
## where_are_you_located, what_type_of_company_do_you_work_for,
## actuaries_at_my_organization, years_of_experience, designation)`
## `summarise()` has grouped output by 'year', 'tool'. You can override using the
## `.groups` argument.
plt_usage_by_designation <- tbl_summary_by_designation %>%
ggplot(aes(designation, n_total, fill = usage_frequency)) %>%
plot_usage_by_category()
plt_usage_by_designation +
coord_flip()

tbl_polyglot <- tbl_respondent_tool %>%
filter(!is.na(usage_frequency)) %>%
group_by(respondent_id) %>%
summarise(
n_tools = sum(usage_frequency != 'Never')
) %>%
group_by(n_tools) %>%
summarise(
total_users = n()
) %>%
mutate(
pct_users = total_users / sum(total_users)
)
plt_polyglot <- tbl_polyglot %>%
ggplot(aes(n_tools, pct_users)) +
geom_bar(stat = 'identity', fill = bar_fill_colors[5]) +
labs(x = "# of tools", y = "% of respondents") +
scale_x_continuous(breaks = 1:9, labels = 1:9 %>% as.character()) +
scale_y_continuous(labels = scales::percent) +
theme_minimal()
plt_polyglot

tbl_polyglot_ge_week <- tbl_respondent_tool %>%
filter(usage_frequency %in% c('Daily', 'Weekly')) %>%
filter(!is.na(usage_frequency)) %>%
group_by(respondent_id) %>%
summarise(
n_tools = sum(usage_frequency != 'Never')
) %>%
group_by(n_tools) %>%
summarise(
total_users = n()
) %>%
mutate(
pct_users = total_users / sum(total_users)
)
plt_polyglot_ge_week <- tbl_polyglot_ge_week %>%
ggplot(aes(n_tools, pct_users)) +
geom_bar(stat = 'identity', fill = bar_fill_colors[5]) +
labs(x = "# of tools", y = "% of respondents") +
scale_x_continuous(breaks = 1:9, labels = 1:9 %>% as.character()) +
scale_y_continuous(labels = scales::percent) +
theme_minimal()
plt_polyglot_ge_week

# What percent of people use at least one tool daily or weekly?
tbl_respondent_tool %>%
filter(ge_week) %>%
group_by(year, respondent_id) %>%
summarize() %>%
group_by(year) %>%
summarize(at_least_one_ge_week = n()) %>%
inner_join(tbl_respondents) %>%
mutate(prop = at_least_one_ge_week / n_respondents)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(year)`
## # A tibble: 3 × 4
## year at_least_one_ge_week n_respondents prop
## <int> <int> <int> <dbl>
## 1 2021 1284 1294 0.992
## 2 2022 638 646 0.988
## 3 2023 813 819 0.993
excel_users <- tbl_respondent_tool$respondent_id[
tbl_respondent_tool$tool == 'Excel'
& tbl_respondent_tool$usage_frequency != 'Never'
& !is.na(tbl_respondent_tool$usage_frequency)
]
tbl_polyglot_excel <- tbl_respondent_tool %>%
filter(
respondent_id %in% excel_users
) %>%
filter(!is.na(usage_frequency)) %>%
group_by(respondent_id) %>%
summarise(
n_tools = sum(usage_frequency != 'Never')
) %>%
group_by(n_tools) %>%
summarise(
total_users = n()
)
tbl_other_tools <- tibble(
other_tools = tbl_responses$what_other_tools_should_be_added %>%
tolower()
) %>%
filter(!is.na(other_tools)) %>%
count(other_tools)
summarise_proficiency <- function(tbl_in) {
tbl_in %>%
group_by(tool, proficiency) %>%
summarise(
n_total = n()
) %>%
ungroup() %>%
filter(!is.na(proficiency)) %>%
group_by(tool) %>%
mutate(
pct_tool = n_total / sum(n_total)
) %>%
group_by(proficiency) %>%
mutate(
pct_proficiency = n_total / sum(n_total)
) %>%
ungroup()
}
f_how_proficient <- c("None", "Basic", "Intermediate", "Expert", "Advanced") %>%
as_factor()
plt_proficiency_summary <- function(tbl_summary_in){
tbl_summary_in %>%
ggplot(aes(tool, n_total, fill = proficiency)) %>%
augment_shifted_stack(f_how_proficient) +
labs(fill = "Proficiency")
}
tbl_proficiency_summary <- tbl_respondent_tool %>%
filter(tool != 'MATLAB') %>%
summarise_proficiency()
## `summarise()` has grouped output by 'tool'. You can override using the
## `.groups` argument.
f_not_proficient_tools <- tbl_proficiency_summary %>%
filter(proficiency == 'Never') %>%
arrange(desc(pct_tool)) %>%
pull(tool) %>%
as.character()
plt_proficiency <- tbl_proficiency_summary %>%
mutate(
n_total = n_total * ifelse(
proficiency == 'Never'
, -1
, 1)
) %>%
mutate(
tool = tool %>% fct_relevel(f_not_proficient_tools)
) %>%
plt_proficiency_summary()
plt_proficiency

tbl_proficiency_v_usage <- tbl_respondent_tool %>%
filter(
!is.na(proficiency)
, !is.na(usage_frequency)
) %>%
group_by(tool, usage_frequency, proficiency) %>%
summarise(
n_total = n()
)
## `summarise()` has grouped output by 'tool', 'usage_frequency'. You can override
## using the `.groups` argument.
# %>%
# pivot_wider(
# values_from = n_total
# , names_from = proficiency
# )
## tbl_proficiency_v_usage %>%
## filter(tool == 'R') %>%
## ggplot(aes(aes(tool, n_total, fill = usage_frequency))) +
## geom_bar(stat = 'identity', position = 'fill') +
## facet_wrap(~ proficiency)
compare_proficiency <- function(tbl_in, proficiency_in = 'Expert') {
tbl_in %>%
select(-respondent_id) %>%
mutate(
prof_val = proficiency == proficiency_in
) %>%
filter(!is.na(prof_val)) %>%
group_by(tool, prof_val) %>%
summarise(
n = n()
) %>%
mutate(
pct_of_total = n / sum(n)
) %>%
filter(prof_val) %>%
arrange(desc(pct_of_total)) %>%
select(-prof_val) %>%
mutate(
proficiency = proficiency_in
)
}
plt_proficiency_summary <- function(tbl_summary_in){
tbl_summary_in %>%
mutate(proficiency = fct_rev(proficiency)) %>%
ggplot(aes(tool, n_total, fill = proficiency)) +
geom_col() +
theme_minimal() +
coord_flip() +
geom_hline(yintercept = 0) +
scale_fill_manual(
values = c("blue", "red", "yellow", "orange", "pink") %>% rev()
)
}
tbl_respondent_tool %>%
compare_proficiency("Basic")
## `summarise()` has grouped output by 'tool'. You can override using the
## `.groups` argument.
## # A tibble: 9 × 4
## # Groups: tool [9]
## tool n pct_of_total proficiency
## <fct> <int> <dbl> <chr>
## 1 Power BI 912 0.332 Basic
## 2 R 858 0.313 Basic
## 3 Tableau 782 0.285 Basic
## 4 SAS 712 0.259 Basic
## 5 SQL 687 0.250 Basic
## 6 Python 646 0.237 Basic
## 7 Google Sheets 617 0.225 Basic
## 8 MATLAB 125 0.0971 Basic
## 9 Excel 21 0.00764 Basic
tbl_respondent_tool %>%
compare_proficiency("None")
## `summarise()` has grouped output by 'tool'. You can override using the
## `.groups` argument.
## # A tibble: 9 × 4
## # Groups: tool [9]
## tool n pct_of_total proficiency
## <fct> <int> <dbl> <chr>
## 1 MATLAB 1132 0.880 None
## 2 Python 1607 0.588 None
## 3 Tableau 1521 0.554 None
## 4 Power BI 1305 0.475 None
## 5 SAS 1169 0.425 None
## 6 R 1045 0.381 None
## 7 Google Sheets 948 0.345 None
## 8 SQL 625 0.228 None
## 9 Excel 5 0.00182 None
tbl_suitability_summary <- tbl_suitability %>%
group_by(practice_area, tool, suitability) %>%
summarise(n_total = n()) %>%
group_by(practice_area, tool) %>%
mutate(
pct_tool_practice = n_total / sum(n_total)
) %>%
ungroup()
## `summarise()` has grouped output by 'practice_area', 'tool'. You can override
## using the `.groups` argument.
# tbl_suitability_summary %>%
# group_by(practice_area, suitability) %>%
# summarise(
# n_total = sum(n_total)
# ) %>%
# group_by(practice_area) %>%
# mutate(
# pct_total = n_total / sum(n_total)
# )
f_suited <- c("Unsure", "Not", "Somewhat", "Very") %>%
as_factor()
plot_suitability <- function(tbl_in){
f_very_much_so <- tbl_in %>%
filter(suitability == 'Very') %>%
group_by(tool) %>%
summarise(n_total = sum(n_total)) %>%
arrange(n_total) %>%
pull(tool) %>%
as.character()
tbl_in %>%
filter(!is.na(suitability)) %>%
mutate(
n_total = n_total * ifelse(suitability == 'Unsure', -1, 1)
, suitability = fct_rev(suitability)
, tool = fct_relevel(tool, f_very_much_so)
) %>%
ggplot(aes(tool, n_total, fill = suitability)) %>%
augment_shifted_stack(f_suited) +
labs(fill = 'Suitability') +
scale_y_continuous(lim = c(-1e3, 1250), labels = abs)
}
plt_suitability_ratemaking <- tbl_suitability_summary %>%
filter(practice_area == 'Ratemaking') %>%
plot_suitability() +
labs(title = 'Ratemaking')
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
plt_suitability_reserving <- tbl_suitability_summary %>%
filter(practice_area == 'Reserving') %>%
plot_suitability() +
labs(title = 'Reserving')
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
plt_suitability_capital <- tbl_suitability_summary %>%
filter(practice_area == 'Capital Modeling') %>%
plot_suitability() +
labs(title = 'Capital Modeling')
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
plot_suitability_by_practice_area <- function(tbl_in, by_tool = FALSE){
if (!by_tool) {
tbl_plot <- tbl_in %>%
group_by(practice_area, suitability)
} else {
tbl_plot <- tbl_in %>%
group_by(practice_area, suitability, tool)
}
tbl_plot %>%
summarise(
n_total = sum(n_total)
) %>%
ungroup() %>%
mutate(
practice_area = practice_area %>%
as_factor() %>%
fct_relevel(c('Ratemaking', 'Reserving', 'Capital Modeling')) %>%
fct_rev()
, n_total = n_total * ifelse(suitability == 'Unsure', -1, 1)
, suitability = suitability %>% fct_rev()
) %>%
ggplot(aes(practice_area, n_total, fill = suitability)) %>%
augment_shifted_stack(f_suited) +
labs(fill = 'Suitability') +
scale_y_continuous(lim = c(-9e3, 9e3), labels = abs)
}
plt_suitability_by_practice <- tbl_suitability_summary %>%
filter(!is.na(suitability)) %>%
plot_suitability_by_practice_area() +
labs(title = 'Suitability by practice area', subtitle = 'Non-responses removed')
## `summarise()` has grouped output by 'practice_area'. You can override using the
## `.groups` argument.
## Scale for y is already present. Adding another scale for y, which will replace
## the existing scale.
plt_suitability_by_practice
## Warning: Removed 2 rows containing missing values (`geom_col()`).

plt_suitability_by_practice_na <- tbl_suitability_summary %>%
mutate(suitability = suitability %>% fct_explicit_na('Unsure')) %>%
plot_suitability_by_practice_area() +
labs(title = 'Suitability by practice area', subtitle = "Non-responses grouped with 'Unsure'")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `suitability = suitability %>% fct_explicit_na("Unsure")`.
## Caused by warning:
## ! `fct_explicit_na()` was deprecated in forcats 1.0.0.
## ℹ Please use `fct_na_value_to_level()` instead.
## `summarise()` has grouped output by 'practice_area'. You can override using the
## `.groups` argument.
## Scale for y is already present. Adding another scale for y, which will replace
## the existing scale.
plt_suitability_by_practice_na
## Warning: Removed 2 rows containing missing values (`position_stack()`).
## Warning: Removed 2 rows containing missing values (`geom_col()`).

plt_suitability_by_practice_excel <- tbl_suitability_summary %>%
filter(tool == 'Excel') %>%
mutate(
suitability = fct_explicit_na(suitability, na_level = 'Unsure')
) %>%
plot_suitability_by_practice_area() +
labs(title = 'Suitability by practice area for Excel', subtitle = "Non-responses grouped with 'Unsure'") +
scale_y_continuous(lim = c(-1200, 1200), labels = abs)
## `summarise()` has grouped output by 'practice_area'. You can override using the
## `.groups` argument.
## Scale for y is already present. Adding another scale for y, which will replace
## the existing scale.
## Scale for y is already present. Adding another scale for y, which will replace
## the existing scale.
plt_suitability_by_practice_excel
## Warning: Removed 3 rows containing missing values (`position_stack()`).
## Warning: Removed 2 rows containing missing values (`geom_col()`).

f_suitable_tools <- tbl_suitability_summary %>%
filter(suitability %in% c('Somewhat', 'Very')) %>%
group_by(tool) %>%
summarise(n_total = sum(n_total)) %>%
arrange(desc(n_total)) %>%
pull(tool)
plt_suitability_by_practice_all <- tbl_suitability_summary %>%
mutate(
suitability = fct_explicit_na(suitability, na_level = 'Unsure')
, tool = fct_relevel(tool, f_suitable_tools)
) %>%
plot_suitability_by_practice_area(by_tool = TRUE) +
facet_wrap(~ tool) +
labs(title = 'Suitability by practice area for all tools', subtitle = "Non-responses grouped with 'Unsure'") +
scale_y_continuous(lim = c(-1200, 1200), labels = abs)
## `summarise()` has grouped output by 'practice_area', 'suitability'. You can
## override using the `.groups` argument.
## Scale for y is already present. Adding another scale for y, which will replace
## the existing scale.
## Scale for y is already present. Adding another scale for y, which will replace
## the existing scale.
plt_suitability_by_practice_all
## Warning: Removed 18 rows containing missing values (`position_stack()`).
## Warning: Removed 17 rows containing missing values (`geom_col()`).

compare_suitability <- function(tbl_in, suitability_in = 'Very') {
tbl_in %>%
select(-respondent_id) %>%
mutate(
suit_val = suitability == suitability_in
) %>%
filter(!is.na(suit_val)) %>%
group_by(tool, practice_area, suit_val) %>%
summarise(
n = n()
) %>%
mutate(
pct_of_total = n / sum(n)
) %>%
filter(suit_val) %>%
arrange(desc(pct_of_total)) %>%
select(-suit_val) %>%
mutate(
suitability = suitability_in
)
}
# tbl_responses %>%
# compare_suitability() %>%
# filter(practice_area == "ratemaking")
#
# tbl_responses %>%
# compare_suitability() %>%
# filter(practice_area == "reserving")
#
# tbl_responses %>%
# compare_suitability() %>%
# filter(practice_area == "capital modeling")
# tbl_responses %>%
# compare_suitability("Never") %>%
# group_by(practice_area) %>%
# arrange(desc(pct_of_total), .by_group = TRUE) %>%
# select(practice_area, tool, n, pct_of_total) %>%
# View()
# tbl_responses %>%
# compare_suitability("Never") %>%
# filter(practice_area == "ratemaking")
#
# tbl_responses %>%
# compare_suitability("Unsure") %>%
# filter(practice_area == "capital modeling")
#
# tbl_responses %>%
# compare_suitability() %>%
# filter(tool == "Excel")
#
# tbl_responses %>%
# compare_suitability() %>%
# filter(tool == "R")
suitability_tool <- function(tbl_in, tool_in){
tbl_in %>%
melt_suitability() %>%
filter(tool == tool_in) %>%
filter(!is.na(suitability)) %>%
group_by(practice_area, suitability) %>%
summarise(
n = n()
) %>%
group_by(practice_area) %>%
mutate(
pct_of_total = n / sum(n)
) %>%
arrange(desc(pct_of_total))
}
# tbl_responses %>%
# suitability_tool("R")
# mojo <- inner_join(
# tbl_responses %>% melt_proficiency()
# , tbl_responses %>% melt_suitability()
# , by = c('respondent_id', 'tool')
# ) %>%
# filter(!is.na(suitability))
summarise_increase_proficiency <- function(tbl_in, vars){
tbl_in %>%
summarise(
n_increase = sum(increase_proficiency)
, n_total = n()
) %>%
mutate(
pct_increase = n_increase / n_total
) %>%
ungroup()
}
tbl_increase_proficiency_summary <- tbl_respondent_tool %>%
group_by(tool) %>%
summarise_increase_proficiency() %>%
arrange(n_increase) %>%
mutate(tool = tool %>% as.character() %>% as_factor())
plt_increase_proficiency <- tbl_increase_proficiency_summary %>%
ggplot(aes(tool, n_increase)) +
geom_bar(position = 'stack', stat = 'identity', fill = bar_fill_colors[5]) +
coord_flip() +
theme_minimal()
plt_increase_proficiency

plt_increase_proficiency_pct <- tbl_increase_proficiency_summary %>%
ggplot(aes(tool, pct_increase)) +
geom_bar(position = 'stack', stat = 'identity', fill = bar_fill_colors[5]) +
scale_y_continuous(labels = scales::percent, limits = c(0,1)) +
coord_flip() +
theme_minimal()
plt_increase_proficiency_pct

plot_proficiency <- function(tbl_in){
tbl_in %>%
ggplot(aes(category, pct_increase)) +
geom_bar(stat = 'identity') +
scale_y_continuous(limits = c(0,1)) +
theme_minimal() +
facet_wrap(~tool) +
geom_hline(yintercept = 0.5)
}
tbl_respondent_tool %>%
filter(!is.na(designation)) %>%
group_by(tool, designation) %>%
summarise_increase_proficiency() %>%
rename(category = designation) %>%
plot_proficiency()
## `summarise()` has grouped output by 'tool'. You can override using the
## `.groups` argument.

tbl_respondent_tool %>%
filter(!is.na(what_type_of_company_do_you_work_for)) %>%
group_by(tool, what_type_of_company_do_you_work_for) %>%
summarise_increase_proficiency() %>%
rename(category = what_type_of_company_do_you_work_for) %>%
plot_proficiency()
## `summarise()` has grouped output by 'tool'. You can override using the
## `.groups` argument.

tbl_respondent_tool %>%
filter(!is.na(actuaries_at_my_organization)) %>%
group_by(tool, actuaries_at_my_organization) %>%
summarise_increase_proficiency() %>%
rename(category = actuaries_at_my_organization) %>%
plot_proficiency()
## `summarise()` has grouped output by 'tool'. You can override using the
## `.groups` argument.

tbl_respondent_tool %>%
filter(!is.na(years_of_experience)) %>%
group_by(tool, years_of_experience) %>%
summarise_increase_proficiency() %>%
rename(category = years_of_experience) %>%
plot_proficiency()
## `summarise()` has grouped output by 'tool'. You can override using the
## `.groups` argument.

increase_proficiency_pct <- function(tbl_in, tool_in = 'Excel'){
tbl_in %>%
filter(tool == tool_in) %>%
filter(!is.na(usage_frequency), !is.na(proficiency)) %>%
mutate(
increase_proficiency = ifelse(increase_proficiency, 'will', 'wont')
) %>%
group_by(usage_frequency, proficiency, increase_proficiency) %>%
summarise(n_total = n()) %>%
ungroup() %>%
pivot_wider(names_from = increase_proficiency, values_from = n_total, values_fill = 0) %>%
arrange(desc(usage_frequency), desc(proficiency)) %>%
select(usage_frequency, proficiency, will, wont) %>%
mutate(
n_total = will + wont
, pct_increase = will / n_total
)
}
plot_increase_proficiency <- function(tbl_in, tool_in) {
str_title <- paste0('# who want to increase their proficiency in ', tool_in)
tbl_in %>%
filter(tool == tool_in) %>%
filter(!is.na(usage_frequency), !is.na(proficiency)) %>%
filter(increase_proficiency) %>%
group_by(usage_frequency, proficiency) %>%
summarise(n_total = n()) %>%
ggplot(aes(proficiency, usage_frequency, fill = n_total)) +
geom_tile(color = 'white') +
theme_minimal() +
theme(
legend.position = 'bottom'
) +
scale_fill_gradient(low = bar_fill_colors[1], high = bar_fill_colors[5]) +
labs(
x = 'Proficiency'
, y = 'Usage frequency'
, title = str_title
, fill = '# of respondents'
)
}
tbl_respondent_tool %>%
plot_increase_proficiency('Excel')
## `summarise()` has grouped output by 'usage_frequency'. You can override using
## the `.groups` argument.

tbl_respondent_tool %>%
plot_increase_proficiency('R')
## `summarise()` has grouped output by 'usage_frequency'. You can override using
## the `.groups` argument.

tbl_respondent_tool %>%
plot_increase_proficiency('SQL')
## `summarise()` has grouped output by 'usage_frequency'. You can override using
## the `.groups` argument.

tbl_respondent_tool %>%
plot_increase_proficiency('Python')
## `summarise()` has grouped output by 'usage_frequency'. You can override using
## the `.groups` argument.

tbl_barrier_summary <- tbl_barrier %>%
group_by(year, barrier) %>%
summarise(
pct_learning = sum(learning, na.rm = TRUE) / sum(!is.na(learning)),
pct_implementation = sum(implementation, na.rm = TRUE) / sum(!is.na(implementation)),
pct_use = sum(use, na.rm = TRUE) / sum(!is.na(use))
) %>%
ungroup() %>%
mutate(
barrier = str_replace_all(barrier, '_', ' ') %>%
str_to_title() %>%
str_replace_all('It', 'IT') %>%
str_replace_all('Of', 'of')
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# %>% mutate(barrier = fct_reorder(barrier, pct_learning))
plt_barriers <- tbl_barrier_summary %>%
select(barrier, pct_learning, pct_implementation) %>%
pivot_longer(names_to = 'barrier_type', values_to = 'pct_yes', cols = -barrier) %>%
mutate(barrier_type = str_remove_all(barrier_type, 'pct_') %>% str_to_title()) %>%
ggplot(aes(barrier, pct_yes)) +
geom_bar(aes(fill = barrier_type), stat = 'identity', position = 'dodge') +
scale_fill_manual(values = bar_fill_colors[c(1,5)]) +
coord_flip() +
scale_y_continuous(labels = scales::percent, lim = c(0,1)) +
theme_minimal() +
labs(
x = 'Barrier'
, y = '% of respondents'
, title = 'Barriers to learning and implementation of tools as reported by respondents'
, fill = 'Type of barrier'
)
plt_barriers
## Warning: Removed 6 rows containing missing values (`geom_bar()`).

tbl_no_techniques <- tbl_techniques %>%
group_by(year, respondent_id) %>%
summarise(
n_techniques = sum(use)
) %>%
filter(n_techniques == 0)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
tbl_technique_summary <- tbl_techniques %>%
anti_join(tbl_no_techniques, by = c('year', 'respondent_id')) %>%
group_by(year, technique) %>%
summarise(
n_use = sum(use),
n_response = n()) %>%
mutate(
pct_use = n_use / n_response
)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
# # %>%
# group_by(technique) %>%
# mutate(
# pct_use = pct_use / sum(pct_use)
# ) %>%
# pivot_wider(values_from = pct_use, names_from = use) %>%
# rename(use_true = `TRUE`, use_false = `FALSE`) %>%
# mutate(
# technique = factor(technique)
# )
#
# tbl_technique_summary$technique <- fct_reorder(
# tbl_technique_summary$technique
# , tbl_technique_summary$use_true)
#
# tbl_technique_summary <- tbl_technique_summary%>%
# pivot_longer(cols = -technique, names_to = 'use', values_to = 'n_use') %>%
# mutate(
# use = use %>% str_remove_all('use_') %>%
# as.logical()
# )
plt_technique_summary <- tbl_technique_summary %>%
arrange(n_use) %>%
mutate(technique = as_factor(technique)) %>%
ggplot(aes(technique, pct_use)) +
geom_bar(position = 'stack', stat = 'identity', fill = bar_fill_colors[5]) +
geom_hline(yintercept = c(0.1, 0.25, 0.5), color = bar_fill_colors[1]) +
ylim(c(0,1)) +
coord_flip() +
theme_minimal() +
labs(
y = '% Use'
, x = 'Technique'
) +
scale_y_continuous(labels = scales::percent, breaks = c(0.1, 0.25, 0.5))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
plt_technique_summary

# tbl_technique_summary_scripter <- tbl_techniques %>%
# inner_join(tbl_respondent_scripter, by = 'respondent_id') %>%
# group_by(technique, scripter, use) %>%
# summarise(pct_use = n()) %>%
# group_by(technique) %>%
# mutate(
# pct_use = pct_use / sum(pct_use)
# ) %>%
# pivot_wider(values_from = pct_use, names_from = use) %>%
# rename(use_true = `TRUE`, use_false = `FALSE`) %>%
# mutate(
# technique = factor(technique)
# )
#
# tbl_technique_summary_scripter$technique <- fct_reorder(
# tbl_technique_summary_scripter$technique
# , tbl_technique_summary_scripter$use_true)
#
# tbl_technique_summary_scripter <- tbl_technique_summary_scripter%>%
# pivot_longer(cols = c('use_false', 'use_true'), names_to = 'use', values_to = 'n_use') %>%
# mutate(
# use = use %>% str_remove_all('use_') %>%
# as.logical()
# )
# tbl_technique_summary_scripter %>%
# filter(use) %>%
# ggplot(aes(technique, n_use, fill = scripter)) +
# geom_bar(position = 'fill', stat = 'identity') +
# geom_hline(yintercept = 0.5) +
# coord_flip() +
# theme_minimal()
tbl_comment <- tbl_responses %>%
filter(!is.na(additional_comments)) %>%
mutate(additional_comments = additional_comments %>% str_to_lower()) %>%
filter(!additional_comments %in% c('none', '-', 'na', 'n/a')) %>%
select(additional_comments)
plt_age <- tbl_demographic %>%
filter(!is.na(age)) %>%
ggplot(aes(age)) +
geom_bar(fill = bar_fill_colors[5]) +
theme_minimal() +
labs(
x = "Age category"
, y = "# of respondents"
, title = "Age of respondents"
, subtitle = "NAs removed"
)
plt_age

plt_designation <- tbl_demographic %>%
filter(!is.na(designation)) %>%
ggplot(aes(designation)) +
geom_bar(fill = bar_fill_colors[5]) +
theme_minimal() +
labs(
x = "Designation"
, y = "# of respondents"
, title = "Designation of respondents"
, subtitle = "NAs removed"
)
plt_designation

plt_company_size <- tbl_demographic %>%
filter(!is.na(actuaries_at_my_organization)) %>%
ggplot(aes(actuaries_at_my_organization)) +
geom_bar(fill = bar_fill_colors[5]) +
theme_minimal() +
labs(
x = NULL
, y = "# of respondents"
, title = "# of actuaries at my organization"
, subtitle = "NAs removed"
)
plt_company_size

plt_years_of_experience <- tbl_demographic %>%
filter(!is.na(years_of_experience)) %>%
ggplot(aes(years_of_experience)) +
geom_bar(fill = bar_fill_colors[5]) +
theme_minimal() +
labs(
x = NULL
, y = "# of respondents"
, title = "Years of experience"
, subtitle = "NAs removed"
)
plt_years_of_experience

plt_location <- tbl_demographic %>%
filter(!is.na(where_are_you_located)) %>%
group_by(where_are_you_located) %>%
summarise(n_total = n()) %>%
mutate(
where_are_you_located = where_are_you_located %>%
str_remove_all(" \\(please specify\\)") %>%
as.factor() %>%
fct_reorder(n_total)
) %>%
ggplot(aes(where_are_you_located, n_total)) +
geom_bar(fill = bar_fill_colors[5], stat = 'identity') +
theme_minimal() +
labs(
x = NULL
, y = "# of respondents"
, title = "Location of respondent"
, subtitle = "NAs removed"
)
plt_location

plt_type_of_company <- tbl_demographic %>%
filter(!is.na(what_type_of_company_do_you_work_for)) %>%
group_by(what_type_of_company_do_you_work_for) %>%
summarise(n_total = n()) %>%
mutate(
what_type_of_company_do_you_work_for = what_type_of_company_do_you_work_for %>%
as.factor() %>%
fct_reorder(n_total)
) %>%
ggplot(aes(what_type_of_company_do_you_work_for, n_total)) +
geom_bar(fill = bar_fill_colors[5], stat = 'identity') +
theme_minimal() +
labs(
x = NULL
, y = "# of respondents"
, title = "Type of company of respondent"
, subtitle = "NAs removed"
) +
coord_flip()
plt_type_of_company

tbl_ai_chi <- tbl_techniques %>%
filter(technique == 'AI/deep learning') %>%
left_join(tbl_demographic, by = 'respondent_id')
chisq.test(
table(tbl_ai_chi$use, tbl_ai_chi$years_of_experience)
)
##
## Pearson's Chi-squared test
##
## data: table(tbl_ai_chi$use, tbl_ai_chi$years_of_experience)
## X-squared = 13.001, df = 4, p-value = 0.01127
chisq.test(
table(tbl_ai_chi$use, tbl_ai_chi$designation)
)
##
## Pearson's Chi-squared test
##
## data: table(tbl_ai_chi$use, tbl_ai_chi$designation)
## X-squared = 16.178, df = 2, p-value = 0.0003069
chisq.test(
table(tbl_ai_chi$use, tbl_ai_chi$age)
)
##
## Pearson's Chi-squared test
##
## data: table(tbl_ai_chi$use, tbl_ai_chi$age)
## X-squared = 11.155, df = 4, p-value = 0.02488
# chisq.test(
# table(tbl_ai_chi$use, tbl_ai_chi$where_are_you_located)
# )
chisq.test(
table(tbl_ai_chi$use, tbl_ai_chi$what_type_of_company_do_you_work_for)
)
##
## Pearson's Chi-squared test
##
## data: table(tbl_ai_chi$use, tbl_ai_chi$what_type_of_company_do_you_work_for)
## X-squared = 12.496, df = 4, p-value = 0.01402
chisq.test(
table(tbl_ai_chi$use, tbl_ai_chi$actuaries_at_my_organization)
)
##
## Pearson's Chi-squared test
##
## data: table(tbl_ai_chi$use, tbl_ai_chi$actuaries_at_my_organization)
## X-squared = 7.1898, df = 5, p-value = 0.2069
chisq.test(
table(tbl_ai_chi$use, tbl_ai_chi$years_of_experience)
)
##
## Pearson's Chi-squared test
##
## data: table(tbl_ai_chi$use, tbl_ai_chi$years_of_experience)
## X-squared = 13.001, df = 4, p-value = 0.01127
tbl_scripter <- tbl_respondent_tool %>%
group_by(respondent_id) %>%
summarise(scripter = sum(scripter) %>% pmin(1))
tbl_ai_chi <- tbl_ai_chi %>%
left_join(tbl_scripter, by = 'respondent_id')
chi_sq_scripter <- chisq.test(
table(tbl_ai_chi$use, tbl_ai_chi$scripter)
)
tbl_ai_chi$use %>% table()
## .
## FALSE TRUE
## 2578 181
64 / (415 + 64)
## [1] 0.1336117
tbls <- ls(pattern = 'tbl_')
plts <- ls(pattern = 'plt_')
fcts <- ls(pattern = 'f_')
numbers <- ls(pattern = 'n_')
funcs <- lsf.str()
save(
file = rdata_out
, list = c(tbls, plts, funcs, fcts, numbers, 'bar_fill_colors')
)